For this Seurat analysis, we used the following parameters:
pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars
Starting with this Seurat object from SCBA output (integrated on source name).
path <- "/fast/groups/cubi/work/milekm/mireille/de/rerun/sc-batch-analysis-rerun/out_rds"
sobj <- readRDS(file.path(path,params$path_to_object))
DimPlot(sobj, label=T, repel =T)
Look at some quality control parameters.
DefaultAssay(sobj) <- "RNA"
FeaturePlot(sobj, features = c("pct.mito", "pct.ribo", "Malat1", "Neat1"))
Show % cells per cluster per replicate to get an idea of reproducibility.
clusters <- sobj[[]] %>%
pull(seurat_clusters) %>%
unique() %>% as.character() %>%
as.numeric() %>%
sort() %>%
as.character()
sobj[[]] %>%
select(sample, group, seurat_clusters,age,strain,compartment,timepoint) %>%
group_by(sample, group, seurat_clusters,age,strain,compartment,timepoint) %>%
summarise(ncells=n()) %>%
ungroup() %>%
group_by(sample) %>%
mutate(cells_in_sample=sum(ncells), cell_proportion=ncells/sum(ncells)*100) %>%
mutate(seurat_clusters = factor(seurat_clusters, levels = clusters)) %>%
ggplot(aes(seurat_clusters, cell_proportion, fill=timepoint)) +
geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
facet_wrap(~age+strain+compartment, scales = "free", ncol = 1) +
theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
xlab("")+ylab("% cells_in_sample")
Which proteins were detected in the ADT assay?
# which proteins are detected in ADT?
GetAssayData(sobj, assay = "Antibody.Capture", slot = "data") %>% rownames()
## [1] "CD335" "CD3" "CD4" "CD8a" "CD19" "HTO7" "HTO8" "HTO9" "HTO10"
## [10] "HTO11" "HTO12" "HTO13" "HTO14" "HTO15" "HTO16" "HTO17" "HTO18" "HTO19"
## [19] "HTO20"
Possible outliers according to cell proportion analysis: 12w_nonSPF_5days_fx, 26w_nonSPF_5days_fx, 26w SPF 5days fx
Show RNA and ADT signal for the detected molecules.
DefaultAssay(sobj) <- "Antibody.Capture"
sobj <- sobj %>%
NormalizeData(normalization.method = "CLR", margin = 2, assay = "Antibody.Capture")
DefaultAssay(sobj) <- "Antibody.Capture"
sobj %>%
FeaturePlot(features=c("CD19"))
DefaultAssay(sobj) <- "RNA"
sobj %>%
FeaturePlot(features=c("Cd19"))
DefaultAssay(sobj) <- "RNA"
sobj %>%
FeaturePlot(features=c("Foxp3"))
sobj %>%
FeaturePlot(features=c("Cd3e"))
DefaultAssay(sobj) <- "Antibody.Capture"
sobj %>%
FeaturePlot(features=c("CD3"))
sobj %>%
FeaturePlot(features=c("CD4"))
sobj %>%
FeaturePlot(features=c("CD335"))
Show dotplot for gene expression in clusters.
marks <- c("Siglech","S100a8","Csf1r","Cd3g","Klrd1","Cd79a","Vpreb1","Ebf1","Cd74","Ptprc","Tfrc","Hbb-bt","Kit","Flt3","Gata1"," Itga2b","Mpo","Elane","Ms4a2","Cd4","Cd8a","Cd19","Cd34","Cd27","Itga4","Itgax","Siglecf","Ccr3","Ly96","Chit1","Ctsk","Nfatc1","Atp6v0d2","Acp5","Ocstamp","Cd14", "Cxcr4", "Cx3cr1", "Ccr2","Mt3","Mmp9","Vcam1","C1qa","Cd5l","C1qc","Pf4")
Idents(sobj) <- "seurat_clusters"
DotPlot(sobj, assay = "RNA", features = marks) +
theme(axis.text.x = element_text(angle=90,hjust=1,vjust=.5))
Show de novo markers per cluster (RNA assay).
DefaultAssay(sobj) <- "RNA"
Idents(sobj) <- "seurat_clusters"
markers <- FindAllMarkers(sobj, only.pos = TRUE, min.pct = 0.25,
thresh.use = 0.25,
print.bar=FALSE, verbose=FALSE)
write.csv(markers,'310124_cluster_markers_rna.csv',row.names=FALSE)
markers <- read.csv('310124_cluster_markers_rna.csv',
stringsAsFactors=FALSE)
top5 <- markers %>%
group_by(cluster) %>%
top_n(5, avg_log2FC)
top10 <- markers %>%
group_by(cluster) %>%
top_n(10, avg_log2FC)
DotPlot(sobj, features = unique(top5$gene)) +
theme_classic() +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5))+
ggtitle("top5 markers per cluster")
Show de novo markers for the ADT assay.
DefaultAssay(sobj) <- "Antibody.Capture"
Idents(sobj) <- "seurat_clusters"
markers <- FindAllMarkers(sobj, only.pos = TRUE, min.pct = 0.25,
thresh.use = 0.25,
print.bar=FALSE, verbose=FALSE)
write.csv(markers,'310124_cluster_markers_adt.csv',row.names=FALSE)
markers <- read.csv('310124_cluster_markers_adt.csv',
stringsAsFactors=FALSE)
top5 <- markers %>%
group_by(cluster) %>%
top_n(5, avg_log2FC)
top10 <- markers %>%
group_by(cluster) %>%
top_n(10, avg_log2FC)
DotPlot(sobj, features = unique(top5$gene)) +
theme_classic() +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5))+
ggtitle("top5 markers per cluster")
Perform label transfer from the Tabula Muris, droplet dataset (FACS does not work for the label transfer here.)
DefaultAssay(sobj) <- "RNA"
#this is from tabula muris https://figshare.com/articles/dataset/Robject_files_for_tissues_processed_by_Seurat/5821263
load("references_bone_marrow/droplet_Marrow_seurat_tiss.Robj")
ref1 <- UpdateSeuratObject(tiss)
# load("references_bone_marrow/facs_Marrow_seurat_tiss.Robj")
# ref2<-UpdateSeuratObject(tiss)
anchors <- FindTransferAnchors(reference = ref1, query = sobj, dims = 1:20, reference.reduction = "pca")
predictions <- TransferData(anchorset = anchors, refdata = ref1$cell_ontology_class, dims = 1:20)
sobj <- AddMetaData(sobj, metadata = predictions)
# DimPlot(sobj, group.by="predicted.id")
DimPlot(sobj, group.by="predicted.id", label = T, repel = T)
Show distributions of label transfer scores.
cols <- colnames(sobj[[]])[grepl("predict", colnames(sobj[[]]))]
sobj[[]] %>%
tibble::rownames_to_column("cells") %>%
select(all_of(c("cells", cols))) %>%
mutate(cell_id = paste0(cells, "_", predicted.id)) %>%
select(-cells, -predicted.id) %>%
pivot_longer(-cell_id, values_to = "score") %>%
group_by(cell_id) %>%
summarise(max_score = max(score)) %>%
mutate(cell_type = sub(".*_", "", cell_id)) %>%
ggplot(aes(cell_type, max_score, fill = cell_type)) +
geom_boxplot() +
theme(axis.text.x=element_text(angle=90, hjust=1,vjust=.5))
Lets look at cell proportions again, this time by annotated cell type.
sobj[[]] %>%
select(sample, group, predicted.id,age,strain,compartment,timepoint) %>%
group_by(sample, group, predicted.id,age,strain,compartment,timepoint) %>%
summarise(ncells=n()) %>%
ungroup() %>%
group_by(sample) %>%
mutate(cells_in_sample=sum(ncells), cell_proportion=ncells/sum(ncells)*100) %>%
ggplot(aes(predicted.id, cell_proportion, fill=timepoint)) +
geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
facet_wrap(~age+strain+compartment, scales = "free", ncol = 1) +
theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
xlab("")+ylab("% cells_in_sample")
Another dotplot of the cell markers after cell annotation.
Idents(sobj) <- "predicted.id"
DotPlot(sobj, assay = "RNA", features = marks) +
theme(axis.text.x = element_text(angle=90,hjust=1,vjust=.5))
Test for significant differences in cell proportions.
samples <- sobj[[]] %>%
pull(sample) %>%
unique()
cell_types <- sobj[[]] %>%
pull(predicted.id) %>%
unique()
sample_id <- expand_grid(samples, cell_types) %>%
mutate(sample_id = paste0(samples,";", cell_types)) %>%
select(-samples, -cell_types)
ncells <- sobj[[]] %>%
select(sample, predicted.id) %>%
group_by(sample, predicted.id,.drop = FALSE) %>%
summarise(ncells=n()) %>%
ungroup() %>%
mutate(sample_id=paste0(sample,";",predicted.id)) %>%
select(-sample, -predicted.id)
tot_cells <- sobj[[]] %>%
select(sample) %>%
group_by(sample,.drop = FALSE) %>%
summarise(tot_cells=n()) %>%
ungroup()
df <- sample_id %>%
left_join(ncells) %>%
mutate(ncells=ifelse(is.na(ncells), 0, ncells)) %>%
mutate(sample = sub(";.*", "", sample_id)) %>%
left_join(tot_cells) %>%
mutate(group=sub(".*-","",sub("_","-",sub("_","-",sub(";.*","",sample_id))))) %>%
mutate(predicted.id=sub(".*;", "", sample_id)) %>%
mutate(cell_proportion=ncells/tot_cells*100) %>%
mutate(age=sub("_.*","",group)) %>%
mutate(strain=sub("_.*","",sub(".*;", "", sub("_",";", group)))) %>%
mutate(compartment=sub(".*_", "", group)) %>%
mutate(timepoint=sub("_.*", "",sub(".*;", "",sub("_", ";",sub("_", ";", group))))) %>%
mutate(group_cell_type=paste0(group,";",predicted.id))
day5 <- df %>%
filter(timepoint=="5days") %>%
pull(group_cell_type) %>% unique()
day2 <- df %>%
filter(timepoint=="2days") %>%
pull(group_cell_type) %>% unique()
comparisons_paired <- Map(c, day5,day2)
names(comparisons_paired) <- NULL
res_ttest <- df %>%
pairwise_t_test(cell_proportion~group_cell_type, comparisons=comparisons_paired, paired = T, p.adjust.method = "BH") %>%
mutate(condition1=sub(";.*","", group1),
condition2=sub(";.*","", group2),
cell_type=sub(".*;","",group1)) %>%
select(-group1,-group2)
DT::datatable(res_ttest,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# res_ttest_unpaired <- df %>%
# split(f=.$predicted.id) %>%
# lapply(function(x) x %>%
# pairwise_t_test(cell_proportion~group, p.adjust.method = "BH", paired=F)) %>%
# bind_rows(.id="cell_type")
#
# DT::datatable(res_ttest,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# cell_type <- "immature B cell"
#
# df %>%
# filter(predicted.id== cell_type) %>%
# mutate(age_strain=paste0(age,"_", strain)) %>%
# mutate(compartment=factor(compartment, levels=c("fx", "proximal", "distal"))) %>%
# ggplot(aes(age_strain, cell_proportion, fill=timepoint)) +
# geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
# geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
# facet_wrap(~compartment, scales = "free_x")+
# scale_fill_manual(values=c("dodgerblue3", "orange2"))+
# theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
# xlab("")+ylab("% cells_in_sample")+
# ggtitle(cell_type)
plot_proportions <- function(cell_type){
df %>%
filter(predicted.id== cell_type) %>%
mutate(age_strain=paste0(age,"_", strain)) %>%
mutate(compartment=factor(compartment, levels=c("fx", "proximal", "distal"))) %>%
ggplot(aes(age_strain, cell_proportion, fill=timepoint)) +
geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
facet_wrap(~compartment, scales = "free_x")+
scale_fill_manual(values=c("dodgerblue3", "orange2"))+
theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
xlab("")+ylab("% cells_in_sample")+
ggtitle(cell_type)
}
lapply(unique(df$predicted.id), plot_proportions)
saveRDS(sobj, "sobj.annotated.310124.rerun.rds")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart/lib/libopenblasp-r0.3.24.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] stringr_1.5.0 ggrepel_0.9.3
## [3] DESeq2_1.40.2 SummarizedExperiment_1.30.2
## [5] Biobase_2.60.0 MatrixGenerics_1.12.2
## [7] matrixStats_1.0.0 GenomicRanges_1.52.0
## [9] GenomeInfoDb_1.36.1 IRanges_2.34.1
## [11] S4Vectors_0.38.1 BiocGenerics_0.46.0
## [13] rstatix_0.7.2 cowplot_1.1.1
## [15] ggplot2_3.4.3 gtools_3.9.4
## [17] tidyr_1.3.0 dplyr_1.1.3
## [19] SeuratObject_4.1.4 Seurat_4.4.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
## [4] magrittr_2.0.3 spatstat.utils_3.0-3 farver_2.1.1
## [7] rmarkdown_2.25 zlibbioc_1.46.0 vctrs_0.6.3
## [10] ROCR_1.0-11 spatstat.explore_3.2-3 RCurl_1.98-1.12
## [13] S4Arrays_1.0.4 htmltools_0.5.6 broom_1.0.5
## [16] sass_0.4.7 sctransform_0.4.0 parallelly_1.36.0
## [19] KernSmooth_2.23-22 bslib_0.5.1 htmlwidgets_1.6.2
## [22] ica_1.0-3 plyr_1.8.9 plotly_4.10.2
## [25] zoo_1.8-12 cachem_1.0.8 igraph_1.5.1
## [28] mime_0.12 lifecycle_1.0.3 pkgconfig_2.0.3
## [31] Matrix_1.6-1.1 R6_2.5.1 fastmap_1.1.1
## [34] GenomeInfoDbData_1.2.10 fitdistrplus_1.1-11 future_1.33.0
## [37] shiny_1.7.5 digest_0.6.33 colorspace_2.1-0
## [40] patchwork_1.1.3 tensor_1.5 irlba_2.3.5.1
## [43] crosstalk_1.2.0 labeling_0.4.3 progressr_0.14.0
## [46] fansi_1.0.4 spatstat.sparse_3.0-2 httr_1.4.7
## [49] polyclip_1.10-6 abind_1.4-5 compiler_4.3.1
## [52] withr_2.5.1 backports_1.4.1 BiocParallel_1.34.2
## [55] carData_3.0-5 MASS_7.3-60 DelayedArray_0.26.6
## [58] tools_4.3.1 lmtest_0.9-40 httpuv_1.6.11
## [61] future.apply_1.11.0 goftest_1.2-3 glue_1.6.2
## [64] nlme_3.1-163 promises_1.2.1 grid_4.3.1
## [67] Rtsne_0.16 cluster_2.1.4 reshape2_1.4.4
## [70] generics_0.1.3 gtable_0.3.4 spatstat.data_3.0-1
## [73] data.table_1.14.8 XVector_0.40.0 sp_2.1-0
## [76] car_3.1-2 utf8_1.2.3 spatstat.geom_3.2-5
## [79] RcppAnnoy_0.0.21 RANN_2.6.1 pillar_1.9.0
## [82] limma_3.56.2 later_1.3.1 splines_4.3.1
## [85] lattice_0.21-9 survival_3.5-7 deldir_1.0-9
## [88] tidyselect_1.2.0 locfit_1.5-9.8 miniUI_0.1.1.1
## [91] pbapply_1.7-2 knitr_1.44 gridExtra_2.3
## [94] scattermore_1.2 xfun_0.40 DT_0.28
## [97] stringi_1.7.12 lazyeval_0.2.2 yaml_2.3.7
## [100] evaluate_0.22 codetools_0.2-19 tibble_3.2.1
## [103] cli_3.6.1 uwot_0.1.16 xtable_1.8-4
## [106] reticulate_1.32.0 munsell_0.5.0 jquerylib_0.1.4
## [109] Rcpp_1.0.11 globals_0.16.2 spatstat.random_3.1-6
## [112] png_0.1-8 parallel_4.3.1 ellipsis_0.3.2
## [115] bitops_1.0-7 listenv_0.9.0 viridisLite_0.4.2
## [118] scales_1.2.1 ggridges_0.5.4 crayon_1.5.2
## [121] leiden_0.4.3 purrr_1.0.2 rlang_1.1.1